﻿
using System;
using System.Collections;
using System.Collections.Generic;
using System.Data;
using System.IO;

namespace CatEars.Core.FileFormat.Csv
{
    /// <summary>
    /// Csv文件读取类
    /// </summary>
    public static class CsvReader
    {
        #region ReadDataTable
        /// <summary>
        /// 将流中的内容读取到DataTable
        /// </summary>
        /// <param name="csvDefine">csv格式定义 null则按默认格式处理</param>
        /// <param name="sr">StreamReader流对象</param>
        /// <param name="callback">回调函数,每写入一行回调一次 可用于进度提示</param>
        /// <returns>DataTable对象</returns>
        public static DataTable ReadDataTable(
            CsvDefine csvDefine,
            StreamReader sr,
            Action<int> callback = null)
        {
            if (csvDefine == null)
            {
                csvDefine = CsvDefine.Default;
            }
            DataTable dtResult = new DataTable();
            HashSet<string> hsColNameUnique = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
            if (csvDefine.FristRowIsHeader)
            {
                string[] headerRow = CsvCore.ReadOneCsvRow(csvDefine, sr);
                if (headerRow != null && headerRow.Length > 0)
                {
                    foreach (string strColName in headerRow)
                    {
                        AddColForReadDataTable(dtResult, strColName, hsColNameUnique);
                    }
                }
            }
            var readToEnd = CsvCore.ReadCsvToEnd(csvDefine, sr);
            foreach (string[] row in readToEnd)
            {
                while (dtResult.Columns.Count < row.Length)
                {
                    AddColForReadDataTable(dtResult, null, hsColNameUnique);
                }
                int intColumnsCount = dtResult.Columns.Count;
                int intIndex = 0;
                DataRow newRow = dtResult.Rows.Add();
                while (intIndex < row.Length)
                {
                    newRow[intIndex] = row[intIndex];
                    intIndex++;
                }
                while (intIndex < intColumnsCount)
                {
                    newRow[intIndex] = string.Empty;
                    intIndex++;
                }
                if (callback != null)
                {
                    callback(dtResult.Rows.Count);
                }
            }
            return dtResult;
        }

        /// <summary>
        /// 添加表头
        /// </summary>
        /// <param name="dt">表对象</param>
        /// <param name="strColName">列名 无内容则自动生成名称 ColumnN</param>
        /// <param name="hsColNameUnique">唯一标记</param>
        private static void AddColForReadDataTable(
            DataTable dt,
            string strColName,
            HashSet<string> hsColNameUnique)
        {
            if (string.IsNullOrEmpty(strColName))
            {
                strColName = "_Column" + (dt.Columns.Count + 1);
            }
            int intTmp = 1;
            string strColNameFinial = strColName;
            while (hsColNameUnique.Contains(strColNameFinial))
            {
                strColNameFinial = strColName + "_" + intTmp;
                intTmp++;
            }
            dt.Columns.Add(strColNameFinial);
            hsColNameUnique.Add(strColNameFinial);
        }

        #endregion

        #region ReadDataTable重载
        /// <summary>
        /// 将csv文件中的内容读取到DataTable
        /// </summary>
        /// <param name="csvDefine">csv格式定义 null则按默认格式处理</param>
        /// <param name="strFilePath">文件路径</param>
        /// <param name="callback">回调函数,每写入一行回调一次 可用于进度提示</param>
        /// <returns>DataTable对象</returns>
        public static DataTable ReadDataTable(
            CsvDefine csvDefine,
            string strFilePath,
            Action<int> callback = null)
        {
            using (FileStream fs = new FileStream(strFilePath, FileMode.Open, FileAccess.Read))
            {
                return ReadDataTable(csvDefine, fs, callback);
            }
        }

        /// <summary>
        /// 将流中的内容读取到DataTable
        /// </summary>
        /// <param name="csvDefine">csv格式定义 null则按默认格式处理</param>
        /// <param name="stream">Stream流对象</param>
        /// <param name="callback">回调函数,每写入一行回调一次 可用于进度提示</param>
        /// <returns>DataTable对象</returns>
        public static DataTable ReadDataTable(
            CsvDefine csvDefine,
            Stream stream,
            Action<int> callback = null)
        {
            if (csvDefine == null)
            {
                csvDefine = CsvDefine.Default;
            }
            //sw不需要Dispose，由外部对stream进行Dispose
            StreamReader sr = new StreamReader(stream, csvDefine.GetEncoding());
            return ReadDataTable(csvDefine, sr, callback);
        }
        #endregion

        #region ReadRowsByHeader
        /// <summary>
        /// 从文件中读取行并按指定表头进行列映射
        /// </summary>
        /// <param name="csvDefine">csv格式定义 null则按默认格式处理</param>
        /// <param name="sr">StreamReader流对象</param>
        /// <param name="strColumnArr">指定特定的表头</param>
        /// <param name="blnErrorOnNotFoundColumn">找不到列抛出异常</param>
        /// <returns></returns>
        public static IEnumerable<string[]> ReadRowsByHeader(
            CsvDefine csvDefine,
            StreamReader sr,
            string[] strColumnArr,
            bool blnErrorOnNotFoundColumn = false)
        {
            if (strColumnArr.IsNullOrEmpty())
            {
                throw new NotSupportedException("未指定strColumnArr不能使用该方法");
            }
            if (csvDefine == null)
            {
                csvDefine = CsvDefine.Default;
                csvDefine.FristRowIsHeader = true;
            }
            else if (!csvDefine.FristRowIsHeader)
            {
                throw new ArgumentException("csvDefine的FristRowIsHeader必须设置为true");
            }
            string[] strHeader = CsvCore.ReadOneCsvRow(csvDefine, sr);
            if (strHeader.IsNullOrEmpty())
            {
                yield break;
            }
            Dictionary<string, int> dictColumn = new Dictionary<string, int>(
                StringComparer.OrdinalIgnoreCase);
            for (int i = strHeader.Length - 1; i >= 0; i--)
            {
                dictColumn[strHeader[i]] = i;
            }
            int[] intArrMapping = new int[strColumnArr.Length];
            for (int i = 0; i < intArrMapping.Length; i++)
            {
                string strColumn = strColumnArr[i];
                int intColIndexInFile;
                if (dictColumn.TryGetValue(strColumn, out intColIndexInFile))
                {
                    intArrMapping[i] = intColIndexInFile;
                }
                else if (blnErrorOnNotFoundColumn)
                {
                    throw new KeyNotFoundException(
                        string.Format("找不到列“{0}”", strColumn));
                }
                else
                {
                    intArrMapping[i] = -1;
                }
            }

            var rows = CsvCore.ReadCsvToEnd(csvDefine, sr);
            foreach (var row in rows)
            {
                string[] strResult = new string[intArrMapping.Length];
                for (int i = 0; i < intArrMapping.Length; i++)
                {
                    int intIndex = intArrMapping[i];
                    if (intIndex >= 0)
                    {
                        strResult[i] = row[intIndex];
                    }
                    else
                    {
                        strResult[i] = "";
                    }
                }
                yield return strResult;
            }
        }
        #endregion

        #region ReadRowsByHeader重载
        ///// <summary>
        ///// 将csv文件中的内容读取到DataTable
        ///// </summary>
        ///// <param name="csvDefine">csv格式定义 null则按默认格式处理</param>
        ///// <param name="strFilePath">文件路径</param>
        ///// <param name="strColumnArr">指定特定的表头</param>
        ///// <param name="blnErrorOnNotFoundColumn">找不到列抛出异常</param>
        ///// <returns>DataTable对象</returns>
        //public static IEnumerable<string[]> ReadRowsByHeader(
        //    CsvDefine csvDefine,
        //    string strFilePath,
        //    string[] strColumnArr,
        //    bool blnErrorOnNotFoundColumn = false)
        //{
        //    //用using和yalid会导致先释放文件再读取数据 取消该方法
        //    using (FileStream fs = new FileStream(strFilePath, FileMode.Open, FileAccess.Read))
        //    {
        //        return ReadRowsByHeader(csvDefine, fs, strColumnArr, blnErrorOnNotFoundColumn);
        //    }
        //}

        /// <summary>
        /// 将流中的内容读取到DataTable
        /// </summary>
        /// <param name="csvDefine">csv格式定义 null则按默认格式处理</param>
        /// <param name="stream">Stream流对象</param>
        /// <param name="strColumnArr">指定特定的表头</param>
        /// <param name="blnErrorOnNotFoundColumn">找不到列抛出异常</param>
        /// <returns>DataTable对象</returns>
        public static IEnumerable<string[]> ReadRowsByHeader(
            CsvDefine csvDefine,
            Stream stream,
            string[] strColumnArr,
            bool blnErrorOnNotFoundColumn = false)
        {
            if (csvDefine == null)
            {
                csvDefine = CsvDefine.Default;
                csvDefine.FristRowIsHeader = true;
            }
            //sw不需要Dispose，由外部对stream进行Dispose
            StreamReader sr = new StreamReader(stream, csvDefine.GetEncoding());
            return ReadRowsByHeader(csvDefine, sr, strColumnArr, blnErrorOnNotFoundColumn);
        }
        #endregion

        #region ReadEntity<T>
        /// <summary>
        /// 将流中的内容按converter转换为对象集合
        /// </summary>
        /// <param name="csvDefine">csv格式定义 null则按默认格式处理</param>
        /// <param name="strFilePath">文件路径</param>
        /// <param name="convertor">转换器 第一个参数为表格内容 第二个参数为表头</param>
        /// <param name="callback">回调函数,每写入一行回调一次 可用于进度提示</param>
        /// <returns>对象集合</returns>
        public static IEnumerable<T> ReadEntity<T>(
            CsvDefine csvDefine,
            string strFilePath,
            Func<string[], string[], T> convertor,
            Action<int> callback = null)
        {
            using (FileStream fs = new FileStream(strFilePath, FileMode.Open, FileAccess.Read))
            {
                return ReadEntity(csvDefine, fs, convertor);
            }
        }

        /// <summary>
        /// 将流中的内容按converter转换为对象集合
        /// </summary>
        /// <param name="csvDefine">csv格式定义 null则按默认格式处理</param>
        /// <param name="stream">Stream流对象</param>
        /// <param name="convertor">转换器 第一个参数为表格内容 第二个参数为表头</param>
        /// <param name="callback">回调函数,每写入一行回调一次 可用于进度提示</param>
        /// <returns>对象集合</returns>
        public static IEnumerable<T> ReadEntity<T>(
            CsvDefine csvDefine,
            Stream stream,
            Func<string[], string[], T> convertor,
            Action<int> callback = null)
        {
            if (csvDefine == null)
            {
                csvDefine = CsvDefine.Default;
            }
            //sw不需要Dispose，由外部对stream进行Dispose
            StreamReader sr = new StreamReader(stream, csvDefine.GetEncoding());
            return ReadEntity(csvDefine, sr, convertor);
        }

        /// <summary>
        /// 将流中的内容按converter转换为对象集合
        /// </summary>
        /// <param name="csvDefine">csv格式定义 null则按默认格式处理</param>
        /// <param name="sr">StreamReader流对象</param>
        /// <param name="convertor">转换器 第一个参数为表格内容 第二个参数为表头</param>
        /// <param name="callback">回调函数,每写入一行回调一次 可用于进度提示</param>
        /// <returns>对象集合</returns>
        public static IEnumerable<T> ReadEntity<T>(
            CsvDefine csvDefine,
            StreamReader sr,
            Func<string[], string[], T> convertor,
            Action<int> callback = null)
        {
            if (csvDefine == null)
            {
                csvDefine = CsvDefine.Default;
            }
            string[] lstHeader = null;
            if (csvDefine.FristRowIsHeader)
            {
                string[] headerRow = CsvCore.ReadOneCsvRow(csvDefine, sr);
                if (headerRow != null && headerRow.Length > 0)
                {
                    lstHeader = headerRow;
                }
            }
            var readToEnd = CsvCore.ReadCsvToEnd(csvDefine, sr);
            int intCount = 0;
            foreach (string[] row in readToEnd)
            {
                var result = convertor(row, lstHeader);
                if (result != null)
                {
                    yield return result;
                    if (callback != null)
                    {
                        intCount++;
                        callback(intCount);
                    }
                }
            }
        }

        #endregion

        #region 其它

        /// <summary>
        /// 打开一个Csv文件
        /// </summary>
        /// <param name="csvDefine">csv格式定义 null则按默认格式处理</param>
        /// <param name="strFilePath">文件路径</param>
        /// <returns>文件流</returns>
        public static StreamReader OpenFile(CsvDefine csvDefine,
            string strFilePath)
        {
            if (csvDefine == null)
            {
                csvDefine = CsvDefine.Default;
            }
            FileStream fs = new FileStream(strFilePath, FileMode.Open, FileAccess.Read);
            StreamReader sr = new StreamReader(fs, csvDefine.GetEncoding());
            return sr;
        }
        #endregion

        #region 多文件读取

        /// <summary>
        /// 从多个文件中计算表头
        /// </summary>
        /// <param name="csvDefine">csv格式定义 null则按默认格式处理</param>
        /// <param name="filePath">文件路径</param>
        /// <param name="blnErrorOnFileNotExist">文件不存在抛出异常</param>
        /// <param name="callback">回调函数,每处理一个文件回调一次 可用于进度提示</param>
        /// <returns></returns>
        public static HashSet<string> MReadHeader(
            CsvDefine csvDefine,
            ICollection<string> filePath,
            bool blnErrorOnFileNotExist = false,
            Action<int, string> callback = null)
        {
            HashSet<string> hsHeaders = new HashSet<string>();
            if (filePath.IsNullOrEmptyT())
            {
                return hsHeaders;
            }
            if (csvDefine == null)
            {
                csvDefine = CsvDefine.Default;
                csvDefine.FristRowIsHeader = true;
            }
            else if (!csvDefine.FristRowIsHeader)
            {
                throw new ArgumentException("csvDefine的FristRowIsHeader必须设置为true");
            }
            int intFileIndex = 0;
            foreach (var file in filePath)
            {
                if (blnErrorOnFileNotExist || File.Exists(file))
                {
                    using (var sr = CsvReader.OpenFile(csvDefine, file))
                    {
                        var header = CsvCore.ReadOneCsvRow(csvDefine, sr);
                        if (header != null)
                        {
                            hsHeaders.UnionWith(header);
                        }
                    }
                }
                if (callback != null)
                {
                    callback(intFileIndex, file);
                }
                intFileIndex++;
            }
            return hsHeaders;
        }

        /// <summary>
        /// 从多个文件中读取数据
        /// </summary>
        /// <param name="csvDefine">csv格式定义 null则按默认格式处理</param>
        /// <param name="filePath">文件路径</param>
        /// <param name="blnErrorOnFileNotExist">文件不存在抛出异常</param>
        /// <param name="callback">回调函数,每处理一个文件回调一次 可用于进度提示</param>
        /// <returns></returns>
        public static IEnumerable<string[]> MReadRows(
            CsvDefine csvDefine,
            ICollection<string> filePath,
            bool blnErrorOnFileNotExist = false,
            Action<int, string> callback = null)
        {
            if (csvDefine == null)
            {
                csvDefine = CsvDefine.Default;
            }
            int intFileIndex = 0;
            foreach (var file in filePath)
            {
                if (blnErrorOnFileNotExist || File.Exists(file))
                {
                    using (var sr = CsvReader.OpenFile(csvDefine, file))
                    {
                        if (csvDefine.FristRowIsHeader)
                        {
                            //表头抛弃
                            CsvCore.ReadOneCsvRow(csvDefine, sr);
                        }
                        var rows = CsvCore.ReadCsvToEnd(csvDefine, sr);
                        foreach (var row in rows)
                        {
                            yield return row;
                        }
                    }
                }
                if (callback != null)
                {
                    callback(intFileIndex, file);
                }
                intFileIndex++;
            }
        }

        /// <summary>
        /// 从多个文件中读取数据
        /// </summary>
        /// <param name="csvDefine">csv格式定义 null则按默认格式处理</param>
        /// <param name="filePath">文件路径</param>
        /// <param name="strColumnArr">指定特定的表头</param>
        /// <param name="blnErrorOnNotFoundColumn">找不到列抛出异常</param>
        /// <param name="blnErrorOnFileNotExist">文件不存在抛出异常</param>
        /// <param name="callback">回调函数,每处理一个文件回调一次 可用于进度提示</param>
        /// <returns></returns>
        public static IEnumerable<string[]> MReadRowsByHeader(
            CsvDefine csvDefine,
            ICollection<string> filePath,
            string[] strColumnArr,
            bool blnErrorOnNotFoundColumn = false,
            bool blnErrorOnFileNotExist = false,
            Action<int, string> callback = null)
        {
            if (filePath.IsNullOrEmptyT())
            {
                yield break;
            }
            if (strColumnArr.IsNullOrEmpty())
            {
                throw new NotSupportedException("未指定strColumnArr不能使用该方法，尝试使用MReadCsvDataRows方法");
            }
            if (csvDefine == null)
            {
                csvDefine = CsvDefine.Default;
                csvDefine.FristRowIsHeader = true;
            }
            else if (!csvDefine.FristRowIsHeader)
            {
                throw new ArgumentException("csvDefine的FristRowIsHeader必须设置为true");
            }
            int intFileIndex = 0;
            foreach (var file in filePath)
            {
                if (blnErrorOnFileNotExist || File.Exists(file))
                {
                    using (var sr = CsvReader.OpenFile(csvDefine, file))
                    {
                        string[] strHeader = CsvCore.ReadOneCsvRow(csvDefine, sr);
                        if (strHeader.IsNullOrEmpty())
                        {
                            continue;
                        }
                        Dictionary<string, int> dictColumn = new Dictionary<string, int>(
                            StringComparer.OrdinalIgnoreCase);
                        for (int i = strHeader.Length - 1; i >= 0; i--)
                        {
                            dictColumn[strHeader[i]] = i;
                        }
                        int[] intArrMapping = new int[strColumnArr.Length];
                        for (int i = 0; i < intArrMapping.Length; i++)
                        {
                            string strColumn = strColumnArr[i];
                            int intColIndexInFile;
                            if (dictColumn.TryGetValue(strColumn, out intColIndexInFile))
                            {
                                intArrMapping[i] = intColIndexInFile;
                            }
                            else if (blnErrorOnNotFoundColumn)
                            {
                                throw new Exception(
                                    string.Format("在文件“{0}”中找不到列“{1}”", file, strColumn));
                            }
                            else
                            {
                                intArrMapping[i] = -1;
                            }
                        }

                        var rows = CsvCore.ReadCsvToEnd(csvDefine, sr);
                        foreach (var row in rows)
                        {
                            string[] strResult = new string[intArrMapping.Length];
                            for (int i = 0; i < intArrMapping.Length; i++)
                            {
                                int intIndex = intArrMapping[i];
                                if (intIndex >= 0)
                                {
                                    strResult[i] = row[intIndex];
                                }
                                else
                                {
                                    strResult[i] = "";
                                }
                            }
                            yield return strResult;
                        }
                    }
                }
                if (callback != null)
                {
                    callback(intFileIndex, file);
                }
                intFileIndex++;
            }
        }

        #endregion
    }
}
